Back to Main Menu

Using Advanced Search to Bulk Export

Sample Script

The following Python script illustrates how to execute a search that spans multiple pages, and save the results to a csv file. For a more general overview of using the advanced search to perform bulk exports, refer to this article on the Integration page.

"""
    Example script to run search profile (Assetic.SearchGetAPI.py)
    Search is paginated so set page size.  On a per page basic the search
    results omit columns where all records are null for that column. This is
    catered for by building a list of columns returned by all pages
"""
import assetic
import csv
import functools
import sys

# Assetic SDK instance
asseticsdk = assetic.AsseticSDK("c:/users/you/assetic.ini",None,"Info")

# Search API
sapi = assetic.SearchApi()

# Define search parameters
searchguid = "f9149c1b-0a43-e611-945f-06edd62954d7"
numpagesize = 50
# Return only those records where Asset Class = "Transport"
searchfilter = "ComplexAssetClass=Transport"

# set search criteria as keyword args
kw = {"request_params_id":searchguid,
    "request_params_page":1,
    "request_params_sorts":"ComplexAssetName-desc",
    "request_params_page_size":numpagesize,
    "request_params_filters":searchfilter}

# Get first page of results
try:
    sg = sapi.search_get(**kw)
except assetic.rest.ApiException as e:
    asseticsdk.logger.error("Status {0}, Reason: {1} {2}".format(
        e.status, e.reason, e.body))
# Log the number of records
totalresults=sg.get("TotalResults")
numpages = sg.get("TotalPages")
asseticsdk.logger.info("Total Results: {0}, Total Pages: {1}".format(
    totalresults,numpages))

# get data from nested output
resourcelist = sg.get("ResourceList")
resource = resourcelist[0]
data = resource.get("Data")

# Copy as "alldata" because we will be appending to this
alldata = data

# Get a list of columns
columns = map( lambda x: x.keys(), data )
if sys.version_info < (3,0):
    columns = reduce( lambda x,y: x+y, columns )
else:
    columns = functools.reduce( lambda x,y: x|y, columns )

# Write list of columns to the "all" list as this may grow
allcolumns = columns

# Now loop through remaining pages
if numpages > 1:
    for pagenum in range(2,int(numpages) + 1):
        # set page number to get
        kw["request_params_page"]=pagenum
        asseticsdk.logger.info("Page: {0}".format(kw["request_params_page"]))

        # Now get results for this page
        try:
            sg = sapi.search_get(**kw)
        except assetic.rest.ApiException as e:
            asseticsdk.logger.error("Status {0}, Reason: {1} {2}".format(
                e.status, e.reason, e.body))
        # get actual data from nested output
        resourcelist = sg.get("ResourceList")
        resource = resourcelist[0]
        data = resource.get("Data")

        # Get column list for this page - there may be new columns
        columns = map(lambda x: x.keys(), data)
        if sys.version_info < (3, 0):
            columns = reduce(lambda x,y: x+y, columns)
            # Add new column list to "allcolumns", will get unique list later
            allcolumns = allcolumns + columns
            # append new data to "alldata"
            alldata = alldata + data
        else:
            columns = functools.reduce(lambda x,y: x|y, columns)
            # merge column list sets
            allcolumns = allcolumns | columns
            # append new data to "alldata"
            alldata.extend(data)

        if pagenum > 10:
            # catchall escape
            break


if sys.version_info < (3, 0):
    # get unique list of columns
    columns = list(set(allcolumns))
else:
    columns = allcolumns

# create csv
if sys.version_info < (3,0):
    with open( "c:/temp/road_dump.csv", "wb" ) as out_file:
        csv_w = csv.writer(out_file)
        csv_w.writerow(columns)

        for i_r in alldata:
            # map data to column list by key to avoid potential issues
            # with column order
            csv_w.writerow( map( lambda x: i_r.get( x, ""), columns))
else:
    with open( "c:/temp/road_dump3.csv", "w", newline="") as out_file:
        csv_w = csv.writer(out_file)
        csv_w.writerow(columns)

        for i_r in alldata:
            # map data to column list by key to avoid potential issues
            # with column order
            csv_w.writerow(map(lambda x: i_r.get( x, ""), columns))				

How it works

The search parameters are defined. The searchguid is the unique ID of the search profile that will be exported (refer to this article).  The parameters include a filter by Asset Class = "Transport" to restrict the search results to only those records where the Asset Class is "Transport".

# Define search parameters
searchguid = "f9149c1b-0a43-e611-945f-06edd62954d7"
numpagesize = 50
# Return only those records where Asset Class = "Transport"
searchfilter = "ComplexAssetClass=Transport"	

The parameter "request_params_page" is set to '1' so that the first page of results are returned. The number of records is set by "request_params_page_size". The records are returned ordered by Asset Id in ascending order, set by "request_params_sorts, and filtered by request_params_filters"

# set search criteria as keyword args
kw = {"request_params_id":searchguid,
    "request_params_page":1,
    "request_params_sorts":"ComplexAssetName-desc",
    "request_params_page_size":numpagesize,
    "request_params_filters":searchfilter}	

The search is executed and the array of data obtained

# Get first page of results
sg = sapi.search_get(**kw)

# get actual data from nested output
resourcelist = sg.get('ResourceList')
resource = resourcelist[0]
data = resource.get('Data')

Each page in subsequent requests may return a different number of fields. The following code block manages this by building a list of columns and holding an array of data (field name and value pairs).

# Copy as alldata becuase we will be appending to this
alldata = data

# Get a list of columns
columns = map( lambda x: x.keys(), data )
columns = functools.reduce( lambda x,y: x|y, columns )

# Write list of columns to the 'all' list as this may grow
allcolumns = columns	

Subsequent pages are then requested as a loop using the total number of records and page size to determine the number of loops

    for pagenum in range(2,int(numpages) + 1):
        # set page number to get
        kw['request_params_page']=pagenum
         asseticsdk.logger.('Page: {0}'.format(kw['request_params_page']))

        # Now get results for this page
        try:
            sg = sapi.search_get(**kw)
        except assetic.rest.ApiException as e:
            asseticsdk.logger.error("Status {0}, Reason: {1} {2}".format(
                e.status, e.reason, e.body))
	

The data array for each page is added to the "alldata" array and the column list "columns" updated to include the columns in the page.

# get actual data from nested output
        resourcelist = sg.get('ResourceList')
        resource = resourcelist[0]
        data = resource.get('Data')

        # append new data to 'alldata'
        alldata = alldata + data
        # Get column list for this page - there may be new columns
        columns = map(lambda x: x.keys(), data)
        columns = functools.reduce(lambda x,y: x|y, columns)
        # merge column list sets
        allcolumns = allcolumns | columns	

Using the unique list of columns and the key/value pairs in the data array the csv file is created. This method also ensures that the data is written to the correct fields, since if there are variable field numbers in each page then the field order would be incorrect if the data were simply written to file page by page.

# create csv
with open('c:/temp/road_dump.csv', 'w', newline='') as out_file:
    csv_w = csv.writer(out_file)
    csv_w.writerow(columns)

    for i_r in alldata:
        # map data to column list by key to avoid potential issues with column order
        csv_w.writerow(map(lambda x: i_r.get(x, ""), columns))